clear all
set more off
set mem 10000000
set matsize 10000
version 13

****************************************************************** 
*** Build File to Process Raw Census 2011 PCA ********************
****************************************************************** 

** Set file paths
do "$path_code/paths.do"

********************************************************************************
********************************************************************************

* Clean and rename some variables
{
use "$pca/pca_census11_india.dta", clear
rename State st_code11
rename District dt_code11
rename Subdistt bk_code11
rename TownVillage vi_code11
local char = substr(upper(trim(itrim(Name[54340]))),9,1)
replace Name=subinstr(Name,"`char'","",.)
gen temp_dist = ""
replace temp_dist = upper(trim(itrim(subinstr(Name,"*","",.)))) if Level=="DISTRICT"
egen district11 = mode(temp_dist), by(st_code11 dt_code11)
gen temp_block = ""
replace temp_block = upper(trim(itrim(Name))) if Level=="SUB-DISTRICT"
egen block11 = mode(temp_block), by(st_code11 dt_code11 bk_code11)
assert block11==temp_block | temp_block==""
replace block11 = subinstr(subinstr(subinstr(block11,"*","",.)," -"," ",.),"- "," ",.)
replace block11 = subinstr(subinstr(subinstr(subinstr(block11,"( ","(",.)," )",")",.),"-I"," I",.),"-1"," I",.)
replace block11 = subinstr(subinstr(subinstr(block11,"(T)","",.),"(S.T)","",.),"(M)","",.)
replace block11 = subinstr(subinstr(subinstr(block11,"C.D.BLOCK","",.),"SUB-DIV.","",.),"SUB-DIVISION","",.)
replace block11 = trim(itrim(subinstr(subinstr(subinstr(block11,"(P)","",.),"CIRCLE","",.),"CD BLOCK","",.)))
replace block11 = "SIRPUR (T)" if block11=="SIRPUR" & bk_code11=="04315"
order st_code11 dt_code11 district11 bk_code11 block11 
keep if Level=="VILLAGE"
rename Name village11
replace village11 = upper(trim(itrim(village11)))
rename No_HH no_hh11
rename TOT_P tot_p11
rename TOT_M tot_m11
rename TOT_F tot_f11
foreach v of varlist P_06-NON_WORK_F {
  local name = lower("`v'") 
	rename `v' `name'
}
destring st_code11 dt_code11 bk_code11 vi_code11, replace
duplicates drop 
gen pca11_id = _n 
order pca11_id 
}

* Merge with 2001-2011 concordance
merge m:m st_code11 dt_code11 vi_code11 using "$conc/census_code_matches_names.dta", nogen keep(1 3) keepusing(st_code dt_code vi_code conc_id)


* Redefine variables as percentages
{
gen pct_06 = p_06/tot_p11
gen pct_sc = p_sc/tot_p11
gen pct_st = p_st/tot_p11
gen lit_p = p_lit/tot_p11
gen lit_m = m_lit/tot_m11
gen lit_f = f_lit/tot_f11
gen work_p = tot_work_p/tot_p11
gen work_m = tot_work_m/tot_m11
gen work_f = tot_work_f/tot_f11
gen work_main_p = mainwork_p/tot_p11
gen work_main_m = mainwork_m/tot_m11
gen work_main_f = mainwork_f/tot_f11
gen work_main_cl_p = main_cl_p/tot_p11
gen work_main_cl_m = main_cl_m/tot_m11
gen work_main_cl_f = main_cl_f/tot_f11
gen work_main_al_p = main_al_p/tot_p11
gen work_main_al_m = main_al_m/tot_m11
gen work_main_al_f = main_al_f/tot_f11
gen work_main_hh_p = main_hh_p/tot_p11
gen work_main_hh_m = main_hh_m/tot_m11
gen work_main_hh_f = main_hh_f/tot_f11
gen work_main_ot_p = main_ot_p/tot_p11
gen work_main_ot_m = main_ot_m/tot_m11
gen work_main_ot_f = main_ot_f/tot_f11
gen work_marg_p = margwork_p/tot_p11
gen work_marg_m = margwork_m/tot_m11
gen work_marg_f = margwork_f/tot_f11
gen work_marg_cl_p = marg_cl_p/tot_p11
gen work_marg_cl_m = marg_cl_m/tot_m11
gen work_marg_cl_f = marg_cl_f/tot_f11
gen work_marg_al_p = marg_al_p/tot_p11
gen work_marg_al_m = marg_al_m/tot_m11
gen work_marg_al_f = marg_al_f/tot_f11
gen work_marg_hh_p = marg_hh_p/tot_p11
gen work_marg_hh_m = marg_hh_m/tot_m11
gen work_marg_hh_f = marg_hh_f/tot_f11
gen work_marg_ot_p = marg_ot_p/tot_p11
gen work_marg_ot_m = marg_ot_m/tot_m11
gen work_marg_ot_f = marg_ot_f/tot_f11
gen marg_p_03 = margwork_0_3_p/margwork_p
gen marg_m_03 = margwork_0_3_m/margwork_m
gen marg_f_03 = margwork_0_3_f/margwork_f
gen marg_cl_p_03 = marg_cl_0_3_p/marg_cl_p
gen marg_cl_m_03 = marg_cl_0_3_m/marg_cl_m
gen marg_cl_f_03 = marg_cl_0_3_f/marg_cl_f
gen marg_al_p_03 = marg_al_0_3_p/marg_al_p
gen marg_al_m_03 = marg_al_0_3_m/marg_al_m
gen marg_al_f_03 = marg_al_0_3_f/marg_al_f
gen marg_hh_p_03 = marg_hh_0_3_p/marg_hh_p
gen marg_hh_m_03 = marg_hh_0_3_m/marg_hh_m
gen marg_hh_f_03 = marg_hh_0_3_f/marg_hh_f
gen marg_ot_p_03 = marg_ot_0_3_p/marg_ot_p
gen marg_ot_m_03 = marg_ot_0_3_m/marg_ot_m
gen marg_ot_f_03 = marg_ot_0_3_f/marg_ot_f
}


* Label new variables
{
la var pct_06 				"% pop 0-6 years old"
la var pct_sc 				"% pop scheduled caste"
la var pct_st 				"% pop scheduled tribe"
la var lit_p  				"% pop literate"
la var lit_m  				"% male pop literate"
la var lit_f  				"% female pop literate"
la var work_p  				"% pop workers"
la var work_m  				"% male pop workers"
la var work_f  				"% female pop workers"
la var work_main_p  	"% pop main workers"
la var work_main_m  	"% male pop main workers"
la var work_main_f  	"% female pop main workers"
la var work_main_cl_p "% pop main cultiavtors"
la var work_main_cl_m "% male pop main cultiavtors"
la var work_main_cl_f "% female pop main cultiavtors"
la var work_main_al_p "% pop main agri-laborers"
la var work_main_al_m "% male pop main agri-laborers"
la var work_main_al_f "% female pop main agri-laborers"
la var work_main_hh_p "% pop main household industry workers"
la var work_main_hh_m "% male pop main household industry workers"
la var work_main_hh_f "% female pop main household industry workers"
la var work_main_ot_p "% pop main other workers"
la var work_main_ot_m "% male pop main other workers"
la var work_main_ot_f "% female pop main other workers"
la var work_marg_p  	"% pop marg workers"
la var work_marg_m  	"% male pop marg workers"
la var work_marg_f  	"% female pop marg workers"
la var work_marg_cl_p "% pop marg cultiavtors"
la var work_marg_cl_m "% male pop marg cultiavtors"
la var work_marg_cl_f "% female pop marg cultiavtors"
la var work_marg_al_p "% pop marg agri-laborers"
la var work_marg_al_m "% male pop marg agri-laborers"
la var work_marg_al_f "% female pop marg agri-laborers"
la var work_marg_hh_p "% pop marg household industry workers"
la var work_marg_hh_m "% male pop marg household industry workers"
la var work_marg_hh_f "% female pop marg household industry workers"
la var work_marg_ot_p "% pop marg other workers"
la var work_marg_ot_m "% male pop marg other workers"
la var work_marg_ot_f "% female pop mairg other workers"
la var marg_p_03	"% marg work < 3 months, total"
la var marg_m_03	"% marg work < 3 months, male"
la var marg_f_03	"% marg work < 3 months, female"
la var marg_cl_p_03	"% marg work < 3 months, cultivators"
la var marg_cl_m_03	"% marg work < 3 months, male cultivators"
la var marg_cl_f_03	"% marg work < 3 months, female cultivators"
la var marg_al_p_03	"% marg work < 3 months, agri-laborers"
la var marg_al_m_03	"% marg work < 3 months, male agri-laborers"
la var marg_al_f_03	"% marg work < 3 months, female agri-laborers"
la var marg_hh_p_03	"% marg work < 3 months, household industry workers"
la var marg_hh_m_03	"% marg work < 3 months, male household industry workers"
la var marg_hh_f_03	"% marg work < 3 months, female household industry workers"
la var marg_ot_p_03	"% marg work < 3 months, other workers"
la var marg_ot_m_03	"% marg work < 3 months, male other workers"
la var marg_ot_f_03	"% marg work < 3 months, female other workers"
}


* Rename and save as 2011 PCA dataset
{
order pca11_id-TRU st_code-conc_id
la var st_code "2001 state code"
la var dt_code "2001 district code"
la var vi_code "2001 village code"
la var st_code11 "2011 state code"
la var dt_code11 "2011 district code"
la var bk_code11 "2011 block code"
la var vi_code11 "2011 village code"
la var district11 "District name"
la var block11 "Block name"
la var village11 "Village name"
la var conc_id "Concordance id"
la var pca11_id "2011 PCA id"

foreach v of varlist p_06-marg_ot_f_03 {
  rename `v' `v'_11
}

drop if inlist(st_code,4,7,25,26,30,31,34,35)  // drop non-RGGVY states
drop Level EB Ward TRU temp*
duplicates drop
compress
save "$pca/pca_census11.dta", replace

drop  p_06-non_work_f pct_06_11-marg_ot_f_03_11
save "$pca/pca_census11_names.dta", replace
}

********************************************************************************
********************************************************************************
